Celestin Apprentice 5

home *** CD-ROM | disk | FTP | other *** search

/ Celestin Apprentice 5 / Apprentice-Release5.iso / Source Code / C / Applications / Python 1.3.3 / Python 133 68K / Demo / www / wwwlib.py < prev next >

Wrap

Text File | 1996-05-20 | 9KB | 318 lines

# WWW protocol interface # Default home page WWW_HOME = 'http://info.cern.ch:80/default.html' # Exception raised when an address leads nowhere BadAddress = 'wwwlib.BadAddress' # Parse an "anchoraddr", return (scheme, host, port, path, search, anchor) # # Note that search and anchor *include* the leading '?' or '#', # but host and port don't include the leading '//' or ':' and scheme # excludes the trailing ':'. # # (Some address forms don't support all parts, and some have # additional parts; yet this does something useful for all formats.) # import regex schemeprog = regex.compile('^[^/:?#]*:') hostportprog = regex.compile('//[^/:?#]*\(:[0-9]*\)?') portprog = regex.compile(':[0-9]*') searchprog = regex.compile('\?[^#]*') anchorprog = regex.compile('#.*') # def parse_addr(anchoraddr): i = anchorprog.search(anchoraddr) if i >= 0: anchor = anchoraddr[i:] docaddr = anchoraddr[:i] else: anchor = '' docaddr = anchoraddr # i = schemeprog.match(docaddr) if i >= 0: scheme = docaddr[:i-1] path = docaddr[i:] else: scheme = '' path = docaddr # if path[:2] == '//': i = hostportprog.match(path) hostport = path[2:i] path = path[i:] else: hostport = '' # i = portprog.search(hostport) if i >= 0: host = hostport[:i] port = hostport[i+1:] else: host = hostport port = '' # i = searchprog.search(path) if i >= 0: search = path[i:] path = path[:i] else: search = '' # return scheme, host, port, path, search, anchor # Convert a (scheme, ..., anchor) tuple back into an address string def unparse_addr(scheme, host, port, path, search, anchor): a = '' if scheme: a = a + scheme + ':' if host or port: a = a + '//' + host if port: a = a + ':' + port if path and path[0] <> '/': a = a + '/' a = a + path if search: if search[0] <> '?': search = '?' + search a = a + search if anchor: if anchor[0] <> '#': anchor = '#' + anchor a = a + anchor return a # Turn a relative address into a full address given the parent's address def full_addr(parent, addr, *isindex): pscheme, phost, pport, ppath, psearch, panchor = parse_addr(parent) if not pscheme: pscheme = 'file' if phost[-1:] == '.': phost = phost[:-1] if pscheme == 'http' and pport in ('80', '2784') or pscheme == 'file': pport = '' # XXX scheme, host, port, path, search, anchor = parse_addr(addr) if not scheme: scheme = pscheme if scheme == pscheme: if not host: host = phost if not port: port = pport if host[-1:] == '.': host = host[:-1] if not path: path = ppath elif path[0] <> '/' and '/' in ppath: import posixpath i = len(ppath) while i > 0 and ppath[i-1] <> '/': i = i-1 path = ppath[:i] + path path = posixpath.normpath(path) if scheme == 'http' and port in ('80', '2784'): port = '' # XXX full = unparse_addr(scheme, host, port, path, search, anchor) return full # Get a document, given its (full) address # (XXX This doesn't support the full range of address schemes yet, # and of course the "telnet" scheme needs special treatment anyway.) def get_document(addr): scheme, host, port, path, search, anchor = parse_addr(addr) # if scheme == 'http': import httplib, socket, string if port: try: port = string.atoi(port) except string.atoi_error: raise BadAddress, 'bad port number' if search: if search[0] <> '?': search = '?' + search path = path + search try: data = httplib.get_htmlfile(host, port, path) except socket.error, msg: if type(msg) <> type(''): msg = `msg` raise BadAddress, msg # Interpret a short single-line document with no tags # as an error message if len(data) <= 100 and '<' not in data and \ '\n' not in data[:-1]: if data[-2:] == '\r\n': data = data[:-2] elif data[-1:] == '\n': data = data[:-1] raise BadAddress, data return data # if (scheme == 'file' or scheme == 'ftp' or scheme == ''): import socket isdir = 0 if host <> '' and socket.gethostbyname(host) <> \ socket.gethostbyname(socket.gethostname()): import ftplib class C: def add(self, x): self.data = self.data + (x + '\n') x = C() x.data = '' try: ftp = ftplib.FTP(host) except ftplib.all_errors: raise BadAddress, \ 'ftp connect to ' + host + ' failed' try: ftp.login() # user anonymous, passwd user@host except ftplib.all_errors: raise BadAddress, \ 'ftp login on ' + host + ' failed' # First try retrieving as a file, then listing # as a directory try: ftp.retrlines('RETR ' + path, x.add) data = x.data except ftplib.error_perm: try: ftp.voidcmd('CWD ' + path) names = ftp.nlst() except ftplib.all_errors: raise BadAddress, \ 'ftp can\'t find ' + path + \ ' on ' + host data = names_to_html(host, path, names, 0) isdir = 1 except ftplib.all_errors: raise BadAddress, \ 'ftp retrieve ' + path + ' from ' + \ host + ' failed' else: import os if os.path.isdir(path): names = os.listdir(path) if os.curdir in names: names.remove(os.curdir) if os.pardir in names: names.remove(os.pardir) data = names_to_html(host, path, names, 1) isdir = 1 else: try: data = open(path, 'r').read() except IOError, msg: raise BadAddress, \ 'can\'t open local file ' + \ `path` + ' : ' + `msg` if not isdir and path[-5:] <> '.html': data = '<PLAINTEXT>\n' + data return data # if scheme == 'gopher': import string import gopherlib if len(path) >= 2 and path[0] == '/': gtype = path[1] selector = path[2:] else: selector = '' gtype = gopherlib.A_DIRECTORY # Convert '%' escapes in selectors while '%' in selector: i = string.index(selector, '%') xx = selector[i+1:i+3] try: n = eval('0x' + xx) c = chr(n) except: c = '' selector = selector[:i] + c + selector[i+3:] if gtype == gopherlib.A_FILE: f = gopherlib.send_selector(selector, host, port) lines = gopherlib.get_textfile(f) lines.append('') data = string.joinfields(lines, '\n') return '<PLAINTEXT>\n' + data if gtype == gopherlib.A_DIRECTORY: f = gopherlib.send_selector(selector, host, port) directory = gopherlib.get_directory(f) return gopher_to_html(directory) if gtype == gopherlib.A_HTML: f = gopherlib.send_selector(selector, host, port) lines = gopherlib.get_textfile(f) lines.append('') data = string.joinfields(lines, '\n') return data if gtype == gopherlib.A_INDEX: if not search: return '<ISINDEX>\n' + \ '<TITLE>Gopher index</TITLE>\n' + \ '<H1>Enter search keywords</H1>\n' words = string.splitfields(search, '+') selector = selector + '\t' + string.join(words) f = gopherlib.send_selector(selector, host, port) directory = gopherlib.get_directory(f) return gopher_to_html(directory) raise BadAddress, 'unsupported gopher type ' + `gtype` # raise BadAddress, 'unsupported scheme ' + `scheme` # Convert a Gopher directory to HTML def gopher_to_html(directory): import string import gopherlib data = '<TITLE>Gopher menu</TITLE>\n<H1>Gopher menu</H1>\n<UL>\n' for item in directory: [gtype, userstring, selector, host, port] = item[:5] if gtype == gopherlib.A_ERROR: text = '<LI>' + userstring + '\n' else: # Convert spaces in selectors to '%' escapes i = 0 while i < len(selector): c = selector[i] if c in ' <>&?#%': c = '%' + hex(0x100|ord(c))[-2:] selector = selector[:i] + c + \ selector[i+1:] i = i + len(c) text = '<LI><A HREF=gopher://' + host + ':' + port + \ '/' + gtype + selector + '>' + \ userstring + '</A>\n' typestr = gopherlib.type_to_name(gtype) text = text + typestr + '\n' data = data + text data = data + '</UL>\n' return data # Convert a unix directory listing to HTML def names_to_html(host, path, names, local): import string, os prefix = '<LI><A HREF=file:' if host: prefix = prefix + '//' + host if path[:1] <> '/': path = '/' + path if not path: path = '.' if host: header = 'Directory ' + path + ' on ' + host else: header = 'Local directory ' + path data = '<H1>' + header + '</H1>\n<DIR COMPACT>\n' if path[-1:] <> '/': path = path + '/' if path not in ('/', './'): dirname = os.path.normpath(path + '../') text = prefix + dirname + '>../</A>\n' data = data + text prefix = prefix + path names.sort() for name in names: if local and os.path.isdir(path + name): name = name + '/' text = prefix + name + '>' + name + '</A>\n' data = data + text data = data + '</DIR>' return data # Main program for simple testing def test(): import sys if not sys.argv[1:]: sys.argv.append(WWW_HOME) for path in sys.argv[1:]: print get_document(path)